/*-------------------------<-- Start of Description -->-------------------------*\ | PURPOSE: This macro is used to compare multiple datasets and check for any | | similar variable names. It also does a comparison of the number of | | observations and prints a list of discrepencies of variable names | | and variable types. | |--------------------------<-- End of Description -->----------------------------| |--------------------------------------------------------------------------------| |------------------------<-- Start of Files Created -->--------------------------| | SYNTAX: %compare (indata = _SAS_Dataset_names_) | \-------------------------<-- End of Files Created-->---------------------------*/ %macro compare (indata=) /des='compares multiple datasets' ; %let notes=%sysfunc(getoption(notes, keyword)) ; options nonotes ; %local indata i merge numdata data obslst ndata mean min max notes ; %let starttime = %sysfunc(datetime()) ; /*---------------------------------\ | calculate variables and defaults | \---------------------------------*/ %let indata = %cmpres(&indata) ; %let numdata = %words(&indata) ; %let merge = %str() ; %let ndata = %str() ; /*------------------------------------------------\ | print error message if fewer than two datasets, | | greater than 9,999,999 datasets, or complete | | individual checks if criteria is met. | \------------------------------------------------*/ %if &numdata < 2 %then %do ; data _null_ ; put 'ERROR: Utility macro %COMPARE requires two or more input datasets.' ; put 'ERROR: The macro will stop executing.' ; run ; %goto the_end ; %end ; %else %if &numdata > 9999999 %then %do ; data _null_ ; put 'ERROR: Utility macro %COMPARE allows a maximum of 9,999,999 datasets.' ; put 'ERROR: The macro will stop executing.' ; run ; %goto the_end ; %end ; %else %do i = 1 %to &numdata ; /*------------------------------------------------------\ | the %do loop uses the %scan function to parse out each| | individual dataset name from the variable &indata. | \------------------------------------------------------*/ %let data = %data(%scan(&indata, &i, %str( ))) ; /*------------------------------------------------------------\ | proc contents outputs variables to c&i for check with other | | datasets from input string. note: cannot check more than | | 9,999,999 datasets (just in case someone actually tries!). | \------------------------------------------------------------*/ proc contents data=&data noprint out=c&i(keep = name format label length nobs type rename = (nobs=nobs&i format=f&i type=t&i length=l&i)); run; /*-----------------------------\ | create list of contents data | \-----------------------------*/ %let merge = &merge c&i (in=c&i); /*--------------------------------------------------\ | obslist is the list of variables of the number of | | observations. later in the program, the mean, | | min, and max observations will be calculated. | | the making of this list is broken into two parts: | | the first ends in a comma, and the last one does | | not. this allows the use of the min, mean, and | | max functions later. | \--------------------------------------------------*/ %if &i ne &numdata %then %let obslst = &obslst nobs&i,; %else %let obslst = &obslst nobs&i; %let ndata = %trim(%left(&ndata &data)); %end; %let indata = &ndata; /*----------------------------------------------------------\ | this datastep completes the comparison process by merging | | the data together. | \----------------------------------------------------------*/ data all (drop=nobs1-nobs&numdata min_obs max_obs mean_obs mtch err_lnt err_typ err_fmt ehold); merge &merge ; by name ; length count $&numdata.. ; label length = 'length' format = 'format' name = 'name' match = 'variable match' label = 'description' count = '1 = yes*0 = no*<-- scroll check -->' ; format min_obs max_obs mean_obs comma16. ; array there {&numdata} c1-c&numdata ; array l {&numdata} l1-l&numdata ; array t {&numdata} t1-t&numdata ; array f {&numdata} f1-f&numdata ; match = 'good ' ; do i = 1 to &numdata ; ehold = 'error = ' ; substr(count,i,1) = put(there{i},1.) ; if substr(count,i,1) = 0 then match = ehold||'m' ; else do ; mtch = ' ' ; if l1 ne l{&numdata} then do ; mtch = 'e' ; err_lnt = 'l' ; end ; if t1 ne t{&numdata} then do ; mtch = 'e' ; err_typ = 't' ; end ; if f1 ne f{&numdata} then do ; mtch = 'e' ; err_fmt = 'f' ; end ; if mtch = 'e' then match = ehold||err_lnt||err_typ ||err_fmt ; end ; end ; length = l1 ; format = l1 ; if t1 = 1 then type = 'num ' ; else if t1 = 2 then type = 'char ' ; else type = 'other' ; if _n_ = 1 then do ; min_obs = min(&obslst); max_obs = max(&obslst); mean_obs = round(mean(&obslst), .1) ; call symput ("min", min_obs) ; call symput ("max", max_obs) ; call symput ("mean", mean_obs) ; end ; run ; %let min = &min ; %let max = &max ; %let mean = &mean ; /*-------------\ | print output | \-------------*/ title1 "comparison of sas datasets" ; title2 "&indata" ; title4 "min obs = &min mean obs = &mean max obs = &max" ; proc print data=all label split='*'; var name count label length format type match; run; title4 "correct match of data?" ; proc freq data=all; tables match /missing; run; /*----------------------------------\ | prepare sas session for open code | \----------------------------------*/ proc datasets nolist memtype=data library=work; delete all %_prefix(c, 1 to &numdata); run; quit; %the_end:; options nonotes; %timenote (macro=checkrec, starttime=&starttime) %put; options ¬es; %mend compare;